Correlate embryo rlog expression to ELT-2 ChIP-seq

Import intestine expression data

GFPplus_samples_rlog_counts <- read_table(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/rlog_counts/GFPplus_samples_rlog_counts.tsv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   WBGeneID = col_character(),
##   embryo_GFPplus_rep1 = col_double(),
##   embryo_GFPplus_rep2 = col_double(),
##   embryo_GFPplus_rep3 = col_double(),
##   L1_GFPplus_rep1 = col_double(),
##   L1_GFPplus_rep3 = col_double(),
##   L3_GFPplus_rep1 = col_double(),
##   L3_GFPplus_rep2 = col_double(),
##   L3_GFPplus_rep3 = col_double()
## )

Import intestine vs non-intestine expression data

res_embryoGFPplus_vs_embryoGFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_embryoGFPplus_vs_embryoGFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
res_L1GFPplus_vs_L1GFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_L1GFPplus_vs_L1GFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
res_L3GFPplus_vs_L3GFPminus_df <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/pairwise_DE_results/res_L3GFPplus_vs_L3GFPminus.csv", col_names = TRUE)
## Rows: 16762 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): WBGeneID
## dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Import intestine categories

embryo_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/embryo_intestine_gene_categories.csv")
## Rows: 3250 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
L1_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/L1_intestine_gene_categories.csv")
## Rows: 3400 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
L3_intestine_gene_categories <- read_csv(file = "../../03_emb_L1_L3_intestine_RNAseq/03_output/intestine_gene_categories/L3_intestine_gene_categories.csv")
## Rows: 2646 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): WBGeneID, altHyp, intestine_expression
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Import promoter ChIP-seq data

L1.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/L1.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
L3.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/L3.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
LE.promoters.hilo <- read.table(file = "../../../David/01_promoters/03_output/LE.promoters.hilo.tsv") %>% rownames_to_column(var = "WBGeneID") %>% select(WBGeneID:IDR_nlogq)
elt2chip_embryo_intestine_df <- LE.promoters.hilo %>% 
  full_join(res_embryoGFPplus_vs_embryoGFPminus_df, by = "WBGeneID") %>% 
  left_join(embryo_intestine_gene_categories, by = "WBGeneID") %>% 
  left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("embryo")), by = "WBGeneID") %>% 
  drop_na(baseMean) %>%
  rowwise() %>%
  mutate(mean_rlog = mean(embryo_GFPplus_rep1, embryo_GFPplus_rep2, embryo_GFPplus_rep3))

elt2chip_L1_intestine_df <- L1.promoters.hilo %>% 
  full_join(res_L1GFPplus_vs_L1GFPminus_df, by = "WBGeneID") %>% 
  left_join(L1_intestine_gene_categories, by = "WBGeneID") %>% 
  left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("L1")), by = "WBGeneID") %>% 
  drop_na(baseMean) %>%
  rowwise() %>%
  mutate(mean_rlog = mean(L1_GFPplus_rep1, L1_GFPplus_rep3)) %>%
  filter(log2FoldChange > -40)

elt2chip_L3_intestine_df <- L3.promoters.hilo %>% 
  full_join(res_L3GFPplus_vs_L3GFPminus_df, by = "WBGeneID") %>% 
  left_join(L3_intestine_gene_categories, by = "WBGeneID") %>% 
  left_join(GFPplus_samples_rlog_counts %>% select(WBGeneID, contains("L3")), by = "WBGeneID") %>% 
  drop_na(baseMean) %>%
  rowwise() %>%
  mutate(mean_rlog = mean(L3_GFPplus_rep1, L3_GFPplus_rep2, L3_GFPplus_rep3))

Correlate normalized read counts in intestine RNA-seq samples to ELT-2 ChIP-seq promoter reads

Hypothesis: Promoter localized ELT-2 ChIP-seq signal is positively correlated with intestine gene expression Setup: Use pearson correlation coefficient to measure correlation. Measure for all genes and genes that are intestine enriched.

Write plotting function

regression_IDRpeak_rlog_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean, intestine_expression) %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
  ggplot(aes(x = log10(IDR_mean), y = mean_rlog)) +
  geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  geom_text(hjust = 0, x =  min(log10(in_df_na_filt$IDR_mean)), y =  max(in_df_na_filt$mean_rlog), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = ""))) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
  facet_grid(.~intestine_expression) + 
  theme_classic() +
  ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}

regression_IDRpeak_rlog_all_genes <- function(in_df){
  in_df_na_filt <- in_df %>% drop_na(IDR_mean) 
  in_df_na_filt%>%
  ggplot(aes(x = log10(IDR_mean), y = mean_rlog)) +
  geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  annotate("text",x =  min(log10(in_df_na_filt$IDR_mean)), y =  max(in_df_na_filt$mean_rlog), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
  theme_classic() +
  ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = ""))
}

regression_promSignal_rlog_intestine_type <- function(in_df){
  in_df_na_filt <- in_df %>% drop_na(intestine_expression, log_chip_signal_mean) %>% filter(intestine_expression != "depleted")
  in_df_na_filt %>%
  ggplot(aes(x = log_chip_signal_mean, y= mean_rlog)) +
  geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
  geom_text(x =  min(in_df_na_filt$log_chip_signal_mean), y =  max(in_df_na_filt$mean_rlog), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = "")), hjust = 0) +
  facet_grid(.~intestine_expression) + 
  theme_classic() +
  ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}


regression_promSignal_rlog_all_genes <- function(in_df){
  in_df_na_filt<- in_df %>% drop_na(log_chip_signal_mean)
  in_df_na_filt %>%
  ggplot(aes(x = log_chip_signal_mean, y= mean_rlog)) +
  geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$mean_rlog)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$mean_rlog)) +
  annotate("text",x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$mean_rlog), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
  theme_classic() +
  ggtitle(paste("data: ", deparse(substitute(in_df)), sep = "")) +
  coord_cartesian(xlim = c(min(in_df_na_filt$log_chip_signal_mean), max(in_df_na_filt$log_chip_signal_mean)),
                  ylim = c(min(in_df_na_filt$mean_rlog), max(in_df_na_filt$mean_rlog))
                  )
}

Embryo correlation

regression_IDRpeak_rlog_intestine_type(elt2chip_embryo_intestine_df)

# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)
regression_IDRpeak_rlog_all_genes(elt2chip_embryo_intestine_df)

# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)
regression_promSignal_rlog_intestine_type(elt2chip_embryo_intestine_df)

# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_All_Promoters.pdf", width = 6, height = 3, dpi = 300)
regression_promSignal_rlog_all_genes(elt2chip_embryo_intestine_df)

# ggsave(filename = "../03_output/PearsonCorrelation_embryoGFPplus_vs_ELT2_ChIP_Seq_Bound.pdf", width = 6, height = 3, dpi = 300)

L1 correlation

regression_promSignal_rlog_all_genes(elt2chip_L1_intestine_df)

regression_promSignal_rlog_intestine_type(elt2chip_L1_intestine_df)

regression_IDRpeak_rlog_all_genes(elt2chip_L1_intestine_df)

regression_IDRpeak_rlog_intestine_type(elt2chip_L1_intestine_df)

L3 correlation

regression_promSignal_rlog_all_genes(elt2chip_L3_intestine_df)

regression_promSignal_rlog_intestine_type(elt2chip_L3_intestine_df)

regression_IDRpeak_rlog_all_genes(elt2chip_L3_intestine_df)

regression_IDRpeak_rlog_intestine_type(elt2chip_L3_intestine_df)

elt2chip_embryo_intestine_df %>% 
  ggplot(aes(x = mean_rlog)) +
  geom_histogram() +
  # geom_point(shape = 19, size = 0.5, alpha = 0.1) +
  # geom_smooth(method = "lm", formula = y~x) +
  # stat_summary(fun.data = "mean_cl_boot") +
  # ggpubr::stat_cor(method = "pearson") +
  facet_grid(~intestine_expression)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

elt2chip_embryo_intestine_df %>% 
  ggplot(aes(x = log_chip_signal_mean)) +
  geom_histogram() +
  # geom_point(shape = 19, size = 0.5, alpha = 0.1) +
  # geom_smooth(method = "lm", formula = y~x) +
  # stat_summary(fun.data = "mean_cl_boot") +
  # ggpubr::stat_cor(method = "pearson") +
  facet_grid(~intestine_expression)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1135 rows containing non-finite values (stat_bin).

Correlate log2FoldChange to to ELT-2 ChIP-seq

Write plotting function

regression_IDRpeak_LFC_intestine_type <- function(in_df){
in_df_na_filt <- in_df %>% drop_na(IDR_mean, intestine_expression)  %>% filter(intestine_expression != "depleted")
in_df_na_filt %>%
  ggplot(aes(x = log10(IDR_mean), y = log2FoldChange)) +
  geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  geom_text(hjust = 0, x =  min(log10(in_df_na_filt$IDR_mean)), y =  max(in_df_na_filt$log2FoldChange), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = ""))) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
  facet_grid(.~intestine_expression) + 
  theme_classic() +
  ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}

regression_IDRpeak_LFC_all_genes <- function(in_df){
  in_df_na_filt <- in_df %>% drop_na(IDR_mean) 
  in_df_na_filt%>%
  ggplot(aes(x = log10(IDR_mean), y = log2FoldChange)) +
  geom_point(shape = 19, size = 0.5, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  annotate("text",x =  min(log10(in_df_na_filt$IDR_mean)), y =  max(in_df_na_filt$log2FoldChange), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
  theme_classic() +
  ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = ""))
}

regression_promSignal_LFC_intestine_type <- function(in_df){
  in_df_na_filt <- in_df %>% drop_na(intestine_expression, log_chip_signal_mean) %>% filter(intestine_expression != "depleted")
  in_df_na_filt %>%
  ggplot(aes(x = log_chip_signal_mean, y= log2FoldChange)) +
  geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
  geom_text(x =  min(in_df_na_filt$log_chip_signal_mean), y =  max(in_df_na_filt$log2FoldChange), aes(label = gene_total), data = in_df_na_filt %>% group_by(intestine_expression) %>% summarise(gene_total = paste0("# genes = ",n(), sep = "")), hjust = 0) +
  facet_grid(.~intestine_expression) + 
  theme_classic() +
  ggtitle(paste("data: ", deparse(substitute(in_df)), sep = ""))
}

regression_promSignal_LFC_all_genes <- function(in_df){
  in_df_na_filt<- in_df %>% drop_na(log_chip_signal_mean)
  in_df_na_filt %>%
  ggplot(aes(x = log_chip_signal_mean, y= log2FoldChange)) +
  geom_point(shape = 19, size = 0.25, alpha = 0.25, color = "black") +
  geom_smooth(method = "lm", formula = y~x) +
  ggpubr::stat_cor(method = "pearson", label.y = 0.9*max(in_df_na_filt$log2FoldChange)) +
  ggpubr::stat_regline_equation(label.y = 0.8*max(in_df_na_filt$log2FoldChange)) +
  annotate("text",x = min(in_df_na_filt$log_chip_signal_mean), y = max(in_df_na_filt$log2FoldChange), label = paste0("# genes = ",nrow(in_df_na_filt), sep = ""), hjust = 0) +
  theme_classic() +
  ggtitle(paste("correlation with all genes\ndata: ", deparse(substitute(in_df)), sep = "")) +
  coord_cartesian(xlim = c(min(in_df_na_filt$log_chip_signal_mean), max(in_df_na_filt$log_chip_signal_mean)),
                  ylim = c(min(in_df_na_filt$log2FoldChange), max(in_df_na_filt$log2FoldChange))
                  )
}

Embryo log2foldchange correlation

regression_IDRpeak_LFC_all_genes(elt2chip_embryo_intestine_df)

regression_IDRpeak_LFC_intestine_type(elt2chip_embryo_intestine_df)

regression_promSignal_LFC_all_genes(elt2chip_embryo_intestine_df)

regression_promSignal_LFC_intestine_type(elt2chip_embryo_intestine_df)

L1 log2foldchange correlation

regression_IDRpeak_LFC_all_genes(elt2chip_L1_intestine_df)

regression_IDRpeak_LFC_intestine_type(elt2chip_L1_intestine_df)

regression_promSignal_LFC_all_genes(elt2chip_L1_intestine_df)

regression_promSignal_LFC_intestine_type(elt2chip_L1_intestine_df)

L3 log2foldchange correlation

regression_IDRpeak_LFC_all_genes(elt2chip_L3_intestine_df)

regression_IDRpeak_LFC_intestine_type(elt2chip_L3_intestine_df)

regression_promSignal_LFC_all_genes(elt2chip_L3_intestine_df)

regression_promSignal_LFC_intestine_type(elt2chip_L3_intestine_df)